str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame':    234 obs. of  11 variables:
##  $ manufacturer: chr  "audi" "audi" "audi" "audi" ...
##  $ model       : chr  "a4" "a4" "a4" "a4" ...
##  $ displ       : num  1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int  1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int  4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr  "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr  "f" "f" "f" "f" ...
##  $ cty         : int  18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int  29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr  "p" "p" "p" "p" ...
##  $ class       : chr  "compact" "compact" "compact" "compact" ...
qplot(x = displ, y = hwy, data = mpg)

Modifying aesthetics

qplot(x = displ, y = hwy, data = mpg, color = drv)

Adding a geom

qplot(x = displ, y = hwy, data = mpg, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'

Histograms

qplot(x = hwy, data = mpg, fill = drv)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Facets

qplot(x = displ, y = hwy, data = mpg, facets = . ~ drv)

qplot(x = hwy, data = mpg, facets = drv ~., binwidth = 2)

Reading maacs data

load("~/Downloads/maacs.Rda")
maacs %>% head()
##   id eno duBedMusM   pm25 mopos
## 1  1 141      2423 15.560   yes
## 2  2 124      2793 34.370   yes
## 3  3 126      3055 38.953   yes
## 4  4 164       775 33.249   yes
## 5  5  99      1634 27.060   yes
## 6  6  68       939 18.890   yes
maacs %>% str
## 'data.frame':    750 obs. of  5 variables:
##  $ id       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ eno      : num  141 124 126 164 99 68 41 50 12 30 ...
##  $ duBedMusM: num  2423 2793 3055 775 1634 ...
##  $ pm25     : num  15.6 34.4 39 33.2 27.1 ...
##  $ mopos    : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...

Histogram of eNO

qplot(x = log(eno), data = maacs )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 108 rows containing non-finite values (stat_bin).

Histogram by groups

qplot(x = log(eno), data = maacs, fill = mopos)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 108 rows containing non-finite values (stat_bin).

Density smooth

qplot(x = log(eno), data = maacs, geom = "density")
## Warning: Removed 108 rows containing non-finite values (stat_density).

qplot(x = log(eno), data = maacs, geom = "density", color = mopos)
## Warning: Removed 108 rows containing non-finite values (stat_density).

Scatterplots

ggplot(data = maacs, mapping = aes(x = log(pm25), y = log(eno)))+
        geom_point()
## Warning: Removed 184 rows containing missing values (geom_point).

ggplot(data = maacs, mapping = aes(x = log(pm25), y = log(eno), shape = mopos)) + 
        geom_point()
## Warning: Removed 184 rows containing missing values (geom_point).

ggplot(data = maacs, mapping = aes(x = log(pm25), y = log(eno), color = mopos)) + 
        geom_point()+
        geom_smooth(method = "lm")
## Warning: Removed 184 rows containing non-finite values (stat_smooth).

## Warning: Removed 184 rows containing missing values (geom_point).

Using facets

gg <- ggplot(data = maacs, mapping = aes(x = log(pm25), y = log(eno))) +
        geom_point()+
        geom_smooth(method = "lm")
        
plot(gg)
## Warning: Removed 184 rows containing non-finite values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).

qplot(x = log(pm25), y = log(eno), data = maacs, facets = .~mopos) + 
        geom_smooth(method = "lm")
## Warning: Removed 184 rows containing non-finite values (stat_smooth).

## Warning: Removed 184 rows containing missing values (geom_point).

# gg + facet_wrap(.~mopos)

Building up in layers

load("~/Downloads/maacs (1).Rda")
head(maacs[,6:8])
##    logpm25 NocturnalSympt        bmicat
## 1 1.192010              0 normal weight
## 2 1.536180              0    overweight
## 3 1.590541              2    overweight
## 4 1.521779              2 normal weight
## 5 1.432328              2 normal weight
## 6 1.276232              2 normal weight

Plotting

load("~/Downloads/maacs (1).Rda")
g <- ggplot(data = maacs, mapping = aes(x = logpm25, y = NocturnalSympt))
summary(g)
## data: id, eno, duBedMusM, pm25, mopos, logpm25, NocturnalSympt,
##   bmicat, logno2_new [750x9]
## mapping:  x = logpm25, y = NocturnalSympt
## faceting: <ggproto object: Class FacetNull, Facet>
##     compute_layout: function
##     draw_back: function
##     draw_front: function
##     draw_labels: function
##     draw_panels: function
##     finish_data: function
##     init_scales: function
##     map: function
##     map_data: function
##     params: list
##     render_back: function
##     render_front: function
##     render_panels: function
##     setup_data: function
##     setup_params: function
##     shrink: TRUE
##     train: function
##     train_positions: function
##     train_scales: function
##     vars: function
##     super:  <ggproto object: Class FacetNull, Facet>
plot(g)

p <- g + geom_point()
plot(p)
## Warning: Removed 134 rows containing missing values (geom_point).

Adding more layers : Smooth and facets

g + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess'
## Warning: Removed 134 rows containing non-finite values (stat_smooth).
## Warning: Removed 134 rows containing missing values (geom_point).

g + geom_point() + geom_smooth(method = "lm")
## Warning: Removed 134 rows containing non-finite values (stat_smooth).

## Warning: Removed 134 rows containing missing values (geom_point).

g + 
        geom_point()+
        facet_grid(. ~ bmicat)+
        geom_smooth(method = "lm")
## Warning: Removed 134 rows containing non-finite values (stat_smooth).

## Warning: Removed 134 rows containing missing values (geom_point).

Modifying aesthetics

g + geom_point(color = "steelblue", size = 4, alpha = 0.5)
## Warning: Removed 134 rows containing missing values (geom_point).

g + geom_point(aes(color = bmicat), size = 4, alpha = 0.5)
## Warning: Removed 134 rows containing missing values (geom_point).

Modifying labels

g +geom_point(aes(color = bmicat)) + 
        labs(title = "MAACS Cohort")+
        labs (x = expression("log "* PM[2.5]), y = "Nocturnal Symptoms")
## Warning: Removed 134 rows containing missing values (geom_point).

Customizing the smooth

g + geom_point(aes(color = bmicat), size =2, alpha = 0.5)+
        geom_smooth(size = 4, linetype = 3, method = "lm", se = FALSE )
## Warning: Removed 134 rows containing non-finite values (stat_smooth).
## Warning: Removed 134 rows containing missing values (geom_point).

Changing the theme

g + geom_point(aes(color = bmicat)) + 
        theme_bw(base_family = "Times")
## Warning: Removed 134 rows containing missing values (geom_point).

Notes about Axis Limits

With base plot

testdat <- data.frame(x = 1:100, y = rnorm(100))
testdat[50,2] <- 100  # Outlier 
plot(testdat$x, testdat$y, type = "l", ylim = c(-3,3))

With ggplot

g <- ggplot(data = testdat, mapping = aes(x = x, y = y))
g +  geom_line()

Exploring the maacs data again

load("~/Downloads/maacs (1).Rda")
maacs %>% head
##   id eno duBedMusM   pm25 mopos  logpm25 NocturnalSympt        bmicat
## 1  1 141      2423 15.560   yes 1.192010              0 normal weight
## 2  2 124      2793 34.370   yes 1.536180              0    overweight
## 3  3 126      3055 38.953   yes 1.590541              2    overweight
## 4  4 164       775 33.249   yes 1.521779              2 normal weight
## 5  5  99      1634 27.060   yes 1.432328              2 normal weight
## 6  6  68       939 18.890   yes 1.276232              2 normal weight
##   logno2_new
## 1   1.617849
## 2   1.884490
## 3   1.712953
## 4   1.458879
## 5   1.294510
## 6   1.468377
(cutpoints <- quantile(maacs$logno2_new, seq(0,1,length = 4), na.rm = TRUE))
##         0%  33.33333%  66.66667%       100% 
## -0.6289321  1.1828710  1.4418993  2.4775279
maacs$no2dec <- cut(maacs$logno2_new,cutpoints) 

levels(maacs$no2dec)
## [1] "(-0.629,1.18]" "(1.18,1.44]"   "(1.44,2.48]"

Code for final plot

g <- ggplot(data = maacs, mapping = aes(x = logpm25, y = NocturnalSympt))

## Add layers

g + geom_point(alpha = 1/3)+
        facet_wrap(bmicat ~ no2dec, nrow = 2, ncol = 4)+
        geom_smooth(method = "lm", se = FALSE, col = "steelblue")+
        theme_bw(base_family = "Avenir", base_size = 10)+
        labs(x = expression("log " * PM[2.5]))+
        labs(y = "Nocturnal Symptoms")+
        labs(title = "MAACS Cohort")
## Warning: Removed 134 rows containing non-finite values (stat_smooth).
## Warning: Removed 134 rows containing missing values (geom_point).